In [10]:
    
import os, sys
import graphlab as gl
import graphlab.aggregate as agg
from tqdm import tqdm_notebook as tqdm
# set canvas path
# gl.canvas.set_target('ipynb')
%matplotlib inline
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
    
In [3]:
    
sales = graphlab.SFrame('data/home_data.gl/')
    
In [4]:
    
sales
    
    Out[4]:
In [21]:
    
sns.lmplot(
    x='sqft_living', 
    y='price', 
    data=sales.to_dataframe(),
    fit_reg=False
) # No regression line
    
    Out[21]:
    
In [22]:
    
train_dataset, test_dataset = sales.random_split(.8, seed=0)
    
In [23]:
    
sqft_model = gl.linear_regression.create(train_dataset, target='price', features=['sqft_living'])
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [24]:
    
print test_dataset['price'].mean()
    
    
In [25]:
    
print sqft_model.evaluate(test_dataset)
    
    
In [28]:
    
plt.figure(num=1, figsize=(15, 10), dpi=80)
axis_to_work = plt
axis_to_work.plot(
    test_dataset['sqft_living'], test_dataset['price'], '.',
    test_dataset['sqft_living'], sqft_model.predict(test_dataset), '-'
)
axis_to_work.show()
sns.despine(top=True, right=True)
    
    
    
In [29]:
    
sqft_model.get('coefficients')
    
    Out[29]:
In [30]:
    
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
    
In [37]:
    
for feature in my_features:
    sns.lmplot(
        x=feature, 
        y='price', 
        data=sales.to_dataframe(),
        fit_reg=False,
        size=3
    ) # No regression line
    
    
    
    
    
    
    
In [38]:
    
my_features_model = graphlab.linear_regression.create(train_dataset, target='price', features=my_features)
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [39]:
    
print sqft_model.evaluate(test_dataset)
print my_features_model.evaluate(test_dataset)
    
    
In [40]:
    
house1 = sales[sales['id'] == '5309101200']
    
In [41]:
    
house1
    
    Out[41]:
In [42]:
    
print house1['price']
    
    
In [43]:
    
print sqft_model.predict(house1)
    
    
In [44]:
    
print my_features_model.predict(house1)
    
    
In [45]:
    
house2 = sales[sales['id'] == '1925069082']
    
In [46]:
    
house2
    
    Out[46]:
In [47]:
    
print house2['price']
    
    
In [48]:
    
print sqft_model.predict(house2)
    
    
In [49]:
    
print my_features_model.predict(house2)
    
    
In [50]:
    
# it was Bill Gates house
    
In [55]:
    
house3 = sales[sales['id']=='5309101200']
    
In [56]:
    
house3
    
    Out[56]:
In [57]:
    
print house2['price']
    
    
In [58]:
    
print sqft_model.predict(house2)
    
    
In [59]:
    
print my_features_model.predict(house2)